library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)
library(gprofiler2)
theme_set(theme_bw())
set.seed(666)gvc_agora_opentargets
Setup environment
Read and prep data
GVC
Genes within 1Mb window of (each side of?) GVC loci from Fanny:
gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") |>
clean_names() |>
separate(gene_id, c("gene_id", "version")) |>
select(-version, -agora_nominated_list, -opentarget_info)
gvcgvc.genes <- gvc |> distinct(gene_id, .keep_all = TRUE) |> select(gene_id, gene_symbol) |> arrange(gene_symbol)
gvc.genesAgora
Alzheimer’s disease gene prioritization scores from Agora:
ago1 <- read_json("syn25741025.overall_scores.json", simplifyVector = TRUE) |> as_tibble()
ago1Alzheimer’s disease genes (AMPAD Agora) from Fanny:
ago2 <- read_csv("AMPAD_agora_032124_gene-list.csv")
ago2ago <- ago1 |> filter(hgnc_symbol %in% ago2$`Gene Symbol`)OpenTargets
Alzheimer’s disease gene prioritization scores from OpenTargets:
ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")
otAdd Ensembl Gene IDs (WTF!):
otcols <- colnames(ot)
otensg <- gconvert(
query = ot$symbol,
organism = "hsapiens",
target= "ENSG",
mthreshold = Inf,
filter_na = TRUE) |>
mutate(input_number = as.character(input_number)) |>
left_join(ot |> rownames_to_column(var = "input_number"), by = "input_number") |>
select(ensembl_gene_id = target, otcols)
otensgAnnotate GVC genes with Agora and OpenTargets scores
sum(gvc.genes$gene_id %in% ago$ensembl_gene_id)[1] 116
sum(gvc.genes$gene_id %in% otensg$ensembl_gene_id)[1] 405
Arrange by Agora’s genetics_score and OpenTargets’ otGeneticsPortal:
d1 <- gvc.genes |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal))
d1Overlap between GVC genes and Agora and OpenTargets genes
x = list(
"GVC" = gvc.genes$gene_id,
"Agora" = ago$ensembl_gene_id,
"OpenTargets" = otensg$ensembl_gene_id
)library(VennDiagram)
grid.newpage()
v <- venn.diagram(
x,
fill = c("#FF0000", "#00FF00", "#0000FF"),
filename = NULL)
grid.draw(v)
p <- get.venn.partitions(x)
pPerform ORA of overlap genes
GVC ∩ Agora ∩ OpenTargets
query <- p |>
filter(..set.. == "GVC∩Agora∩OpenTargets") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)# save overlap gene ids for later
overlap_gene_ids <- query(Agora ∩ OpenTargets) ∖ (GVC)
query <- p |>
filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ OpenTargets) ∖ (Agora)
query <- p |>
filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(OpenTargets) ∖ (GVC ∪ Agora)
query <- p |>
filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ Agora) ∖ (OpenTargets)
query <- p |>
filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(Agora) ∖ (GVC ∪ OpenTargets)
query <- p |>
filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC) ∖ (Agora ∪ OpenTargets)
query <- p |>
filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") |>
unnest(..values..) |>
select(gene_id = ..values..) |>
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |>
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |>
arrange(desc(genetics_score), desc(otGeneticsPortal)) |>
distinct(gene_id) |>
pull(gene_id)gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Perform ORA of GVC genes sorted by Agora or OpenTargets genetics scores
Agora
query <- d1 |> arrange(desc(genetics_score)) |> distinct(gene_id) |> pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)OpenTargets
query <- d1 |> arrange(desc(otGeneticsPortal)) |> distinct(gene_id) |> pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "fdr")
gostres$result |> select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Correlation of Agora and OpenTargets scores in GVC genes
d1 |> nrow()[1] 1345
d1 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()[1] 56
d1 |> drop_na(genetics_score, otGeneticsPortal) |>
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
unnest(cor)d1 |> nrow()[1] 1345
d1 |> drop_na(target_risk_score, globalScore) |> nrow()[1] 75
d1 |> drop_na(target_risk_score, globalScore) |>
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
unnest(cor)Correlation of Agora and OpenTargets scores overall
d2 <- ago |>
left_join(otensg, by = "ensembl_gene_id") |>
arrange(desc(genetics_score), desc(otGeneticsPortal))
d2d2 |> nrow()[1] 926
d2 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()[1] 75
d2 |> drop_na(genetics_score, otGeneticsPortal) |>
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
unnest(cor)d2 |> nrow()[1] 926
d2 |> drop_na(target_risk_score, globalScore) |> nrow()[1] 484
d2 |> drop_na(target_risk_score, globalScore) |>
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
unnest(cor)Print environment
sessioninfo::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.4.1 (2024-06-14)
os macOS Sonoma 14.5
system aarch64, darwin20
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2024-06-28
pandoc 3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
backports 1.5.0 2024-05-23 [1] CRAN (R 4.4.0)
bit 4.0.5 2022-11-15 [1] CRAN (R 4.4.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.4.0)
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.4.0)
broom * 1.0.6 2024-05-17 [1] CRAN (R 4.4.0)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.4.0)
cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.0)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.4.0)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.4.0)
crosstalk 1.2.1 2023-11-23 [1] CRAN (R 4.4.0)
data.table 1.15.4 2024-03-30 [1] CRAN (R 4.4.0)
digest 0.6.36 2024-06-23 [1] CRAN (R 4.4.0)
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
evaluate 0.24.0 2024-06-10 [1] CRAN (R 4.4.0)
fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.0)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.0)
formatR 1.14 2023-01-17 [1] CRAN (R 4.4.0)
futile.logger * 1.4.3 2016-07-10 [1] CRAN (R 4.4.0)
futile.options 1.0.1 2018-04-20 [1] CRAN (R 4.4.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.0)
ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.0)
glue 1.7.0 2024-01-09 [1] CRAN (R 4.4.0)
gprofiler2 * 0.2.3 2024-02-23 [1] CRAN (R 4.4.0)
gtable 0.3.5 2024-04-22 [1] CRAN (R 4.4.0)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.4.0)
httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.4.0)
httr 1.4.7 2023-08-15 [1] CRAN (R 4.4.0)
janitor * 2.2.0 2023-02-02 [1] CRAN (R 4.4.0)
jsonlite * 1.8.8 2023-12-04 [1] CRAN (R 4.4.0)
knitr 1.47 2024-05-29 [1] CRAN (R 4.4.0)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.4.0)
lambda.r 1.2.4 2019-09-18 [1] CRAN (R 4.4.0)
later 1.3.2 2023-12-06 [1] CRAN (R 4.4.0)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.4.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
mime 0.12 2021-09-28 [1] CRAN (R 4.4.0)
munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.0)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
plotly 4.10.4 2024-01-13 [1] CRAN (R 4.4.0)
promises 1.3.0 2024-04-05 [1] CRAN (R 4.4.0)
purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0)
Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.4.0)
RCurl 1.98-1.14 2024-01-09 [1] CRAN (R 4.4.0)
readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.0)
readxl * 1.4.3 2023-07-06 [1] CRAN (R 4.4.0)
rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.0)
rmarkdown 2.27 2024-05-17 [1] CRAN (R 4.4.0)
rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.0)
scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
shiny 1.8.1.1 2024-04-02 [1] CRAN (R 4.4.0)
snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.4.0)
stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.0)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.0)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.0)
timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.0)
utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
VennDiagram * 1.7.3 2022-04-12 [1] CRAN (R 4.4.0)
viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.4.0)
vroom 1.6.5 2023-12-05 [1] CRAN (R 4.4.0)
withr 3.0.0 2024-01-16 [1] CRAN (R 4.4.0)
xfun 0.45 2024-06-16 [1] CRAN (R 4.4.0)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.4.0)
yaml 2.3.8 2023-12-11 [1] CRAN (R 4.4.0)
[1] /Users/marcoe02/.Rlib
[2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
──────────────────────────────────────────────────────────────────────────────